package com.app.core.util;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class WebUtil {

    public static void main(String[] args) {
        // String dd = "a.yuantu.com";
        // System.out.println(getMainDomain(dd));
        // dd = "yuantu.com";
        // System.out.println(getMainDomain(dd));
        // dd = "localhost";
        // System.out.println(getMainDomain(dd));

        String s = "aa.bb.cc.dd.com:8080/asdfasf/afdasdf/asdf.html";
        String mainDomain = getMainDomainFromUrl(s);
        System.out.println(mainDomain);
        s = "aa.bb.cc.dd.com/asdfasf/afdasdf/asdf.html";
        mainDomain = getMainDomainFromUrl(s);
        System.out.println(mainDomain);
        s = "www.aa.bb.cc.dd.com/asdfasf/afdasdf/asdf.html";
        mainDomain = getMainDomainFromUrl(s);
        System.out.println(mainDomain);
        s = "www.aa.bb.cc.dd.com:8080/asdfasf/afdasdf/asdf.html";
        mainDomain = getMainDomainFromUrl(s);
        System.out.println(mainDomain);
        s = "www.dd.com/asdfasf/afdasdf/asdf.html";
        mainDomain = getMainDomainFromUrl(s);
        System.out.println(mainDomain);
    }

    public static String getMainDomain(String domain) {
        if (domain == null) {
            return null;
        }

        if (domain.equals("localhost") || domain.equals("127.0.0.1")) {
            return null;
        }

        // 排除端口
        domain = domain.split("/")[0];
        domain = domain.split(":")[0];

        if (!domain.endsWith(".com") && !domain.endsWith(".net") && !domain.endsWith(".org")) {
            return domain;
        }

        String[] split = domain.split("\\.");
        if (split.length < 2) {
            return domain;
        }

        return split[split.length - 2] + "." + split[split.length - 1];
    }

    public static String getMainDomainFromUrl(String url) {
        if (url == null) {
            return null;
        }

        Pattern p = Pattern.compile("(?<=//|)((\\w)+\\.)+\\w+");
        Matcher m = p.matcher(url);
        if (m.find()) {
            return getMainDomain(m.group());
        }
        return null;
    }
}
